#### 1. Setting up the environment for analysis ------

rm( list = ls() ) # clearing our environment 

# Installing necessary packages 
install.packages("lme4")
install.packages("lmerTest")
install.packages("readstata13")
install.packages("broom.mixed")
install.packages("naniar")


library(lme4)
library(lmerTest)
require(readstata13)
library(broom.mixed)
library(LMest)
library(naniar)
library(rstudioapi)   

# Set working directory 

setwd(dirname(getActiveDocumentContext()$path))

getwd()






#### 2. Parental preference models ------- 

# Loading in the data 

dat_BP <- read.dta13("long_both_parents.dta")


# Fitting the baseline model 

baseline <- glmer(voteConPW ~ patvalW_halfSD + wave + (1|osm_hh/pidp), 
                     family= binomial(link = "logit"),
                     nAGQ=0,
                     data = dat_BP)
summary(baseline)

# calculating confidence intervals and ORs

cc_b <- confint(baseline,parm="beta_",method="Wald")
ctab_b <- cbind(est=fixef(baseline),cc_b)
rtab_b <- exp(ctab_b)
print(cc_b,digits=2)
print(rtab_b,digits=3)

# Fitting the covariate model 

covariate <- glmer(voteConPW ~ patvalW_halfSD + educW + ethnicW + incomeW_SD + sexW + ageW + occupation_statusW + regionW + mortgageW + childrenW + maritalW + unemployedW + wave + (1|osm_hh/pidp), 
                       family= binomial(link = "logit"),
                       nAGQ=0,
                       data = dat_BP)
summary(covariate)

# calculating confidence intervals and ORs

cc_c <- confint(covariate,parm="beta_",method="Wald")
ctab_c <- cbind(est=fixef(covariate),cc_c)
rtab_c <- exp(ctab_c)
print(cc_c,digits=1)
print(rtab_c,digits=1)

# Fitting the model with parental preferences included

covariate_parents <- glmer(voteConPW ~ patvalW_halfSD + mother_voteConW + father_voteConW + incomeW_SD + educW + ethnicW + sexW + ageW + occupation_statusW + regionW + mortgageW + childrenW + maritalW + unemployedW + wave + (1|osm_hh/pidp), 
                       family= binomial(link = "logit"),
                       nAGQ=0,
                       data = dat_BP)
summary(covariate_parents)

# calculating confidence intervals and ORs

cc_p <- confint(covariate_parents,parm="beta_",method="Wald")
ctab_p <- cbind(est=fixef(covariate_parents),cc_p)
rtab_p <- exp(ctab_p)
print(cc_p,digits=2)
print(rtab_p,digits=1)

#### 3. Markov models ---------

# Installing necessary packages 

install.packages("LMest")
install.packages("MultiLCIRT")

library(LMest)
library(MultiLCIRT)

# Loading in the data

markov_dat <- read.dta13("markov_data_4waves.dta")
markov_dat_3 <- read.dta13("markov_data_3waves.dta")

# Testing time-heterogeneity assumption 

markov_uni_het <- lmest(responsesFormula = voteConPW ~ NULL,
                        data = markov_dat, index = c("pidp","wave"), k = 2, start = 0,
                        modSel = c("BIC", "AIC"), modBasic = 0,
                        paramLatent = "multilogit",
                        weights = NULL, tol = 10^-8, maxit = 1000,
                        out_se = TRUE, q = NULL, output = FALSE,
                        fort = TRUE, seed = NULL, ntry = 0)

markov_uni_hom <- lmest(responsesFormula = voteConPW ~ NULL,
                        data = markov_dat, index = c("pidp","wave"), k = 2, start = 0,
                        modSel = c("BIC", "AIC"), modBasic = 1,
                        paramLatent = "multilogit",
                        weights = NULL, tol = 10^-8, maxit = 1000,
                        out_se = TRUE, q = NULL, output = FALSE,
                        fort = TRUE, seed = NULL, ntry = 0)

print(markov_uni_het)
print(markov_uni_hom)

## Univariate model - Conservative voting only

markov_uni <- lmest(responsesFormula = voteConPW ~ NULL,
                    data = markov_dat, index = c("pidp","wave"), k = 2, start = 0,
                    modSel = c("BIC", "AIC"), modBasic = 0,
                    paramLatent = "multilogit",
                    weights = NULL, tol = 10^-8, maxit = 1000,
                    out_se = TRUE, q = NULL, output = FALSE,
                    fort = TRUE, seed = NULL, ntry = 0)

print(markov_uni)
summary(markov_uni)

## Estimating the covariate model - with Patrimony

markov_cov <- lmest(responsesFormula = voteConPW ~ NULL, 
                    latentFormula = ~ I(patcount_4W) + I(patvalW_halfSD) 
                    + I(genderW) + I(mortgageW) + I(incomeW_SD)
                    + I(ethnicW) + I(educW) + I(ageW - 18)
                    + I(occupation_statusW == 2) + I(occupation_statusW == 3)
                    + I(regionW == 1) + I(regionW == 2) + I(regionW == 3) 
                    + I(regionW == 4) + I(regionW == 5) + I(regionW == 6) 
                    + I(regionW == 7) + I(regionW == 9) + I(regionW == 10) 
                    + I(regionW == 11) + I(childrenW) + I(maritalW) + I(retiredW) + I(unemployedW) | I(patcount_4W) + I(patvalW_halfSD) 
                    + I(genderW) + I(mortgageW) + I(incomeW_SD) 
                    + I(ethnicW) + I(educW)  + I(ageW - 18) 
                    + I(occupation_statusW == 2) + I(occupation_statusW == 3) 
                    + I(regionW == 1) + I(regionW == 2) + I(regionW == 3) 
                    + I(regionW == 4) + I(regionW == 5) + I(regionW == 6) 
                    + I(regionW == 7) + I(regionW == 9) + I(regionW == 10) 
                    + I(regionW == 11) + I(childrenW) + I(maritalW) + I(retiredW) + I(unemployedW),
                    data = markov_dat, index = c("pidp","wave"), k = 2, start = 0,
                    modSel = c("BIC", "AIC"), modBasic = 0,
                    paramLatent = "multilogit",
                    weights = NULL, tol = 10^-8, maxit = 1000,
                    out_se = TRUE, q = NULL, output = FALSE,
                    fort = TRUE, seed = NULL, ntry = 0)

print(markov_cov)
summary(markov_cov)

## Estimating the model which includes patrimony change scores

markov_ch <- lmest(responsesFormula = voteConPW ~ NULL, 
                     latentFormula = ~ I(patcount_4W) + I(patvalW_halfSD)
                     + I(patcount_incW) + I(patval_diffWSD) + I(genderW) + I(mortgageW) + I(incomeW_SD)
                     + I(ethnicW) + I(educW) + I(ageW - 18)
                     + I(occupation_statusW == 2) + I(occupation_statusW == 3)
                     + I(regionW == 1) + I(regionW == 2) + I(regionW == 3) 
                     + I(regionW == 4) + I(regionW == 5) + I(regionW == 6) 
                     + I(regionW == 7) + I(regionW == 9) + I(regionW == 10) 
                     + I(regionW == 11) + I(maritalW) + I(retiredW) + I(unemployedW) | I(patcount_4W) + I(patvalW_halfSD) 
                     + I(patcount_incW) + I(patval_diffWSD) + I(genderW) + I(mortgageW) + I(incomeW_SD)
                     + I(ethnicW) + I(educW) + I(ageW - 18)
                     + I(occupation_statusW == 2) + I(occupation_statusW == 3)
                     + I(regionW == 1) + I(regionW == 2) + I(regionW == 3) 
                     + I(regionW == 4) + I(regionW == 5) + I(regionW == 6) 
                     + I(regionW == 7) + I(regionW == 9) + I(regionW == 10) 
                     + I(regionW == 11) + I(maritalW) + I(retiredW) + I(unemployedW),
                     data = markov_dat_3, index = c("pidp","wave"), k = 2, start = 0,
                     modSel = c("BIC", "AIC"), modBasic = 0,
                     paramLatent = "multilogit",
                     weights = NULL, tol = 10^-8, maxit = 1000,
                     out_se = TRUE, q = NULL, output = FALSE,
                     fort = TRUE, seed = NULL, ntry = 0)

print(markov_ch)
summary(markov_ch)


